{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e86ad46f-fcca-474d-8696-ae22e7174926",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "def chat(prompt):\n",
    "    url = \"https://openai.api2d.net/v1/chat/completions\" # 服务域名+接口\n",
    "    headers = { 'Content-Type': 'application/json', 'Authorization': 'Bearer fk212271-eTpF7inqXN1UJ6FPQgRDCRkpv2K9zsmn'}\n",
    "    data = {\n",
    "  \"model\": \"gpt-3.5-turbo\",\n",
    "  \"messages\": [{\"role\": \"user\", \"content\": prompt}]\n",
    "}\n",
    "    response = requests.post(url, headers=headers, json=data)\n",
    "    return response\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56233a6d-638c-4937-a660-cbcb9e2b3665",
   "metadata": {},
   "source": [
    "# 任务1：初识ChatGPT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "96921dd3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Status Code 200\n",
      "JSON Response  {'id': 'chatcmpl-7qM0rct9RML77nWwpjgOH5X8va59F', 'object': 'chat.completion', 'created': 1692712029, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '当然可以！给你讲一个笑话：\\n\\n有一天，小明去参加英语口语比赛。比赛开始，主持人问小明：“请你用英语造一个句子，句子里要包含一个关于鸟的单词。”小明想了一下，然后大声说道：“Yesterday，I opened the door of my penguin，and it fly fly fly！（昨天，我打开了我的企鹅的门，它飞了起来，飞啊飞啊飞！）”\\n\\n全场笑声爆发，主持人惊讶地问：“你有一只会飞的企鹅？”小明回答：“当然没有！因为，我的句子是‘I opened the door of my penguin，and it fly fly fly！’这样企鹅也能飞了嘛！”'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 18, 'completion_tokens': 221, 'total_tokens': 239, 'pre_token_count': 4096, 'pre_total': 42, 'adjust_total': 39, 'final_total': 3}}\n"
     ]
    }
   ],
   "source": [
    "response = chat(\"你好！给我讲个笑话。\")\n",
    "print(\"Status Code\", response.status_code)\n",
    "print(\"JSON Response \", response.json())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf00ae6a-7629-47c5-a20b-2da0164faf04",
   "metadata": {},
   "source": [
    "# 任务2：ChatGPT编写正则"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5e106d52-e18c-4147-a4ee-311d0c941379",
   "metadata": {},
   "outputs": [],
   "source": [
    "s_list = ['Enron Dataset: Over half a million anonymized emails from over 100 users. It’s one of the few publically available collections of “real” emails available for study and training sets.',\n",
    "'Google Blogger Corpus: Nearly 700,000 blog posts from blogger.com. The meat of the blogs contain commonly occurring English words, at least 200 of them in each entry.',\n",
    "'SMS Spam Collection: Excellent dataset focused on spam. Nearly 6000 messages tagged as legitimate or spam messages with a useful subset extracted directly from Grumbletext.',\n",
    "'Recommender Systems Datasets: Datasets from a variety of sources, including fitness tracking, video games, song data, and social media. Labels include star ratings, time stamps, social networks, and images.',\n",
    "'Project Gutenberg: Extensive collection of book texts. These are public domain and available in a variety of languages, spanning a long period of time.']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "6c2677fb-f764-4d5e-815b-f99561a08cee",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:21<00:00,  4.22s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'id': 'chatcmpl-7qLP24IuTVppIRhm4ov4x8U97nckw', 'object': 'chat.completion', 'created': 1692709684, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': \"首字母大写的单词如下所示：\\n\\nEnron\\nDataset\\nOver\\nIt's\\nOne\\nOf\\nThe\\nFew\\nCollections\\nReal\\nEmails\\nFor\\nStudy\\nAnd\\nTraining\\nSets\"}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 73, 'completion_tokens': 49, 'total_tokens': 122, 'pre_token_count': 4096, 'pre_total': 42, 'adjust_total': 40, 'final_total': 2}}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 首字母大写\n",
    "result_list = []\n",
    "prompt = '给你一句英文文本\"{}\", 帮我寻找首字母大写的单词'\n",
    "for s in tqdm(s_list):\n",
    "    response = chat(prompt.format(s))\n",
    "    result_list.append(response.json())\n",
    "print(result_list[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "9a652d12-5f2f-4f47-bcce-3b7210b3fdb9",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:22<00:00,  4.59s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'id': 'chatcmpl-7qLPakIQnxroUDLhStqJjaZttU1wU', 'object': 'chat.completion', 'created': 1692709718, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Enron, Dataset, Over, It’s, one, of, the, few, available, collections, real, emails, for, study, and, training.'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 79, 'completion_tokens': 34, 'total_tokens': 113, 'pre_token_count': 4096, 'pre_total': 42, 'adjust_total': 41, 'final_total': 1}}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 首字母大写且长度小于10\n",
    "result_list = []\n",
    "prompt = '给你一句英文文本\"{}\", 帮我寻找首字母大写且长度小于10的单词'\n",
    "for s in tqdm(s_list):\n",
    "    response = chat(prompt.format(s))\n",
    "    result_list.append(response.json())\n",
    "print(result_list[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "5006deb2-514e-4988-b23a-bee1e492ebb2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:19<00:00,  3.98s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'id': 'chatcmpl-7qLQ4Fv9cReSDulz1D7A7mhFVTsMK', 'object': 'chat.completion', 'created': 1692709748, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '末尾为标点符号的单词有：\\n- Dataset\\n- users\\n- sets'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 76, 'completion_tokens': 23, 'total_tokens': 99, 'pre_token_count': 4096, 'pre_total': 42, 'adjust_total': 41, 'final_total': 1}}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 首字母大写\n",
    "result_list = []\n",
    "prompt = '给你一句英文文本\"{}\", 帮我寻找末尾为标点符号的单词'\n",
    "for s in tqdm(s_list):\n",
    "    response = chat(prompt.format(s))\n",
    "    result_list.append(response.json())\n",
    "print(result_list[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ce9fdf2c-5ff7-4fd7-a7a7-1d7cb527c28f",
   "metadata": {},
   "source": [
    "# 任务3：ChatGPT自动EDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "9e655aa8-76d9-497f-a71b-b9edb6afc75a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['rate_marriage', 'age', 'yrs_married', 'children', 'religious',\n",
       "       'affairs'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('https://mirror.coggle.club/dataset/affairs.txt')\n",
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "9e0f31ce-8365-49b7-bcee-9cbd62af587e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'chatcmpl-7qLtRdW3LUH6o7aKnm6Up5EIAHpAN',\n",
       " 'object': 'chat.completion',\n",
       " 'created': 1692711569,\n",
       " 'model': 'gpt-3.5-turbo-0613',\n",
       " 'choices': [{'index': 0,\n",
       "   'message': {'role': 'assistant',\n",
       "    'content': \"你可以使用pandas中的scatter方法来绘制age列和yrs_married列的散点图。具体代码如下：\\n```python\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# 创建示例DataFrame\\ndata = {'rate_marriage': [3, 4, 4, 5, 5, 2],\\n        'age': [25, 32, 42, 35, 27, 45],\\n        'yrs_married': [1, 7, 15, 4, 2, 20],\\n        'children': [0, 3, 1, 1, 1, 5],\\n        'religious': [3, 1, 1, 3, 1, 2],\\n        'affairs': [0.111, 4.0, 0.5, 1.0, 2.0, 4.3]}\\n\\ndf = pd.DataFrame(data)\\n\\n# 绘制散点图\\ndf.plot.scatter(x='age', y='yrs_married')\\n\\n# 显示图形\\nplt.show()\\n```\\n运行以上代码，将显示age列和yrs_married列的散点图。\"},\n",
       "   'finish_reason': 'stop'}],\n",
       " 'usage': {'prompt_tokens': 64,\n",
       "  'completion_tokens': 266,\n",
       "  'total_tokens': 330,\n",
       "  'pre_token_count': 4096,\n",
       "  'pre_total': 42,\n",
       "  'adjust_total': 38,\n",
       "  'final_total': 4}}"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt_1 = \"如何使用pandas对表格进行缺失值统计？请列出具体的代码\"\n",
    "prompt_2 = \"给定dataframe列名如下：['rate_marriage', 'age', 'yrs_married', 'children', 'religious','affairs']。请问如何使用pandas来统计affairs列的分布并绘制饼图？请列出具体的代码\"\n",
    "prompt_3 = \"给定dataframe列名如下：['rate_marriage', 'age', 'yrs_married', 'children', 'religious','affairs']。请问如何使用pandas来统计yrs_married列的分布并绘制柱状图？请列出具体的代码\"\n",
    "prompt_4 = \"给定dataframe列名如下：['rate_marriage', 'age', 'yrs_married', 'children', 'religious','affairs']。请问如何使用pandas来绘制age列和yrs_married列的散点图\"\n",
    "\n",
    "response = chat(prompt_4)\n",
    "response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "fa20ce29-48a2-4f5e-9a89-ac3d0dbe31cc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rate_marriage    0\n",
      "age              0\n",
      "yrs_married      0\n",
      "children         0\n",
      "religious        0\n",
      "affairs          0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "missing_values = df.isna().sum()\n",
    "print(missing_values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "562cd2c5-9bd5-4450-bf3c-ecca5555d80c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "affairs的分布： {0: 4313, 1: 2053}\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAAD3CAYAAADmIkO7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcDUlEQVR4nO3deXxU9b3/8dd3JpNA2CGKgspRFKSuqKjoxaUubRmrRVttq9ddL2pbrbZ1fnrbjr1Vx4fVuqHYVkVaW2t7vRQdtbZ1wwUXLCiLiuIoS9hlIIGs8/39cQ46hASSkMznO3M+z8cjD5JM5nzfo3nne+bMmfM11lqUUu6JSAdQSrVOy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScLRhjjjbGLDTG1BhjvmGMGWyMeckYs8EYc1s77l9jjNmrQFn3CMaLdtH2Jhtjfhp8fpwxZklXbDfY3jhjzPtdtb0wCG05jTEvGGM+M8ZUtLjpF8A91tre1tppwKXAaqCvtfaa7W03uN+iLsh3vjGmOShfjTHmY2PMQ8aYEXljfRqM19yObb3cjuwTrbX/s6PZgzGtMWbvvG3PsNaO7Ipth0Uoy2mM8YBxgAVObXHzMGBei6/n2y44W6MTM9xr1treQD/gRGATMMsYs/+OZumCbKq7WWtD9wH8DHgFuB14Mu/7HwE5/BLUAH8CGoGG4OsTgcOB14B1QDVwD1Cetw0L7B18PgW4D3gKqA3uPx6YD2wAlgI/aiPj+cDLrXz/SeCvwedeMF5Z3n0WBdv+GDgbGAXUAc3BY1i3jWxTgF8Gtx8HLAGuw99zyABn5+V4Abi4tbzAS0Gu2mDMszZvL+/nRwXbWIf/x/DUvNumAJOAdPBYXgeGS//eFPz3VDqAyIOGD4HLgUOD8g3Ouy0DnNjiF+WXeV8fChwJlAXlWABclXd7y3JmgaPx91J6BIUeF9w+ADikjYxtlfNCYEXw+eflBHoB64GRwW27Avu1ta02srUsZxP+H7AK4NigbJu332Y5W/53yNvekuDzWPD/4DqgHPhyUMKRednW4v8hLAMeAR6V/r0p9EfodmuNMf+Bv6v6mLV2Fv5s+d323t9aO8taO9Na22StzQD34//ituVv1tpXrLU5a20d/h+DLxlj+lprP7PWvt3Bh7AMGNjGbTlgf2NMT2tttbV2Xhs/11a21vzUWltvrX0RfyY7s4N5W3Mk0BtIWWsbrLXP4e8RfCfvZx631r5hrW3CL+fBXTBuUQldOYHzgGettauDr/8YfK9djDEjjDFPGmOWG2PWAzcBVdu4y+IWX5+Bv2v7iTHmRWPM2A5kBxiKP6tswVpbi7/7OBGoNsakjTH7bmdbLbO19Fmw3c0+AYZ0JGwbhgCLrbW5Ftsemvf18rzPN+KXOVRCVU5jTE/8v/zHBuVaDvwQOMgYc1A7N3Mf8B6wj7W2L/6umdnGz29xIMla+6a19jRgZ2Aa8FjHHgUTgBmtDmTt3621J+Hv0r4H/La1DG1la8UAY0yvvK/3wJ+5wd/Frcy7bZftbCvfMmB3Y0z+798e+M/BVSBU5QS+gX9g5Ev4u0kH4x+YmAGc285t9MF/blcTzEyXtXdwY0y5MeZsY0w/a21jsJ1tvgwS3C9qjNnTGHM3/nO3G1r5mcHGmFODMtXjH4jZvO0VwG7GmPL2Zs1zQ5B7HHAK8Jfg+7OB040xlcFLJhe1uN8KoK3Xe1/HL/dPjDExY8xxwNeBRzuRr2SFrZznAQ9Z//XB5Zs/8I+4nm2MKWvHNn6E/xx1A/7M9OcOZvhPIBPsEk8EztnGz441xtTgl/gFoC8wxlr7bis/GwGuwZ+V1uI/D748uO05/COiy40xq1u5b1uWA58F23wEmGitfS+47df4R7FXAA8Ht+dLAg8bY9YZY7Z4nmqtbcB/Cetr+EeC7wXOzdu2AkxwdEwp5ZiwzZxKFQ0tp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KO0nIq5Sgtp1KOas/aIEqIl0hH8RcD8vBX8cr/GIy/dkrP4COG//8zgr+QUS3+0nmb/12Pv+bJkhYfSzOp+HYXU1KFp2ulOMJLpPfEX8n5S/grn40C9sFfVbo71eOvzv1u3sc7mVR82TbvpbpdKMtpjPkqcCcQBX5nrU0VcnwvkY4Bo/GXfD8q+HfXQmZoh8XAi8BLwIuZVPwD4TyhE7pyGmOiwAfASfi7dW8C37HWzu/Ocb1Eugp/ReuvAyfj75IWk+XA8/gL/j6VScVrZOOUvjCWcyyQtNZ+Jfj6/wFYa2/u6rG8RHoY/lLwpwJjKZ0DcPXAP4HHgemZVLwja36qdgrjAaGh+Ltsmy0BjuiqjXuJdC/gm/gL9R7HtpekL1YVQDz4aPYS6WfxFxJ+IpOKN4kmKyFhLGdrZdnh3QcvkR4LXIpfzN47ur0iEsVfofprwAovkX4Y+F0mFV8oG6v46W7tDuzWeol0BJiAv9z72K7MWQL+BdySScX/IR2kWIWxnGX4B4ROAJbiHxD6rrV2Xnu34SXSlcCFwFXA8G6IWUreAm4BHs+k4jnpMMUkdOUEMMaMB+7A3yV70Fp7Y3vu5yXS5cBlwPXATt0WsDS9j1/SqXrSQ/uEspwdFey+ngP8AhgmHKfYzQeuy6Tif5MO4jot53Z4iXQcuBk4QDpLiXkJ+GEmFX9bOoirtJxt8BJpD7gL/6QB1T1ywEPATzKp+FrpMK7RcrbgJdJlwI+An+GfUK6630rg+5lU/DHpIC7RcubxEulDgQeAg6SzhNR04PJMKr5UOogLtJx8fsAnAdxAOE/McEkWuCaTij8gHURa6MvpJdJDgd8Dx0tnUVv4M3BJJhXfIB1ESqjL6SXS38DfjR0oHEW17kPgrLAe0Q1lOYMrDNwGXCmdRW1XPfCjTCp+j3SQQgtdOb1Euj/wGP77OVXx+ANwUSYVb5AOUiihKqeXSI8AngBGSGdRnTIDmJBJxddIBymEUnnz73Z5ifTJwOtoMYvZOGBm8Ee25IWinF4i/R0gDfQXjqJ23N74BT1WOkh3K/lyeon0JfjPV/T1y9IxAHjGS6S/Kh2kO5V0Ob1E+mrgN5T44wypHsA0L5Eu2XOfS/aX1kukf47/cokqXRXA/3qJ9BnSQbpDSZbTS6SvA5LSOVRBxIBHvUT629JBulrJvZTiJdITgfukc6iCawJOzaTiT0sH6SolVU4vkT4L+CMlukegtqsW+HImFX9DOkhXKJlyeon0V/BPMIhJZ1GiVgNHlcKlOUuinF4ifRDwCtBLOotywsf4BV0uHWRHFP3uX7AGyTS0mOoLewLTvUS6u1do61ZFXc7gkiKP4a9fqVS+MUBRv5OlqMsJ3I6+SVq17WIvkb5YOkRnFe1zTi+RPg+YIp1DOa8eGJdJxd+UDtJRRVlOL5HeG5iNPs9U7bMYGF1sbzUrut3a4HnmI2gxVfvtDtwrHaKjiq6c+NeTPVw6hCo6ZxbbKX5FtVvrJdJH4V/GPyqdRRWltcD+mVS8WjpIexTNexy9RLonMBWhYubqaljz9F00rP4UgKrxV7L+rek0rl0S3F5LpEcvhlxw91b3Xf/mNGrmPAsGYjt5VI2/ClNWzmcvPMSmRbMo33lPqk65BoCauc+Rq9tA38NOK9yDC4+B+CtwnyIdpD2Kabf2OgTXwlz7r9/QY69DGXrJZIZceDexQbuz02nXMuSCuxlywd1UjjyKyhFHbXW/pg2rWT/rCXY579cMueheyOWoXfASufpa6pcuYMiF92BtjoZVGXKN9dTO/Sd9RscFHmFoxL1E+nzpEO1RFOUMrhnzY6nxc/UbqVs8j94HngyAicaI9PhiZXlrLRvfe5leo45pYwPN2KYGbK4Z21RPtPdAwGCbm7DWYpsaMJEo6994nD6HnoqJFs0OTbG61Uuknb9WcbH8FkzCf2OtiKZ1y4lW9mXNU3fQsPJjKnbZmwEnXEqkvAcA9UvmEe3Vn9jAoVvdt6xPFX0Pn8DS+y7AlJXTY8/R9NzzEAAqRx5F9ZQf0GPYQZiKXjRUf0D/o79T0McWUlXATcBE6SDb4vzMGRxhO1Eyg80107D8I/qMHs+QC+7CxCpYP/Mvn99eO//FNmfN5roaNi58naETH2C3K6ZiG+upmfc8AP2O+CZDLribgV++mOyMP9B/3DlsmPN3Vk1Lse7VRwvy2ELsEi+RPlg6xLY4Xc7gINCvpHOU9aki2qeKiiEjAagceTQNKz4C/OJu/OA1KvdtvZx1mdmU9RtMtLIfJlpG5Yix1C9dsMXPbN5W2YCh1M59jp2+kaBx1Sc0rtXFtrpRBLhTOsS2OF1O4Cpg633FAov2HkBZ3yoa1/hHZus+mUOsag//88xsYoN2o6xvVav3Leu7Ew3L3ifXWIe11r/voN23+Jl1M/5Av/84G3JNYHP+N00E21TffQ9KARzjJdKnS4doi7PlDJ6wXyudY7OBJ05k9ZO/YtmD36Nh5cf0HXsmALULXtpql7ZpwxpW/OXnAFQMGUnlyKOpnnIV1Q9eAdbS56Avrui48YPXKN9lH8r6DCLSozcVQ/Zl2QNXgIHynfcq3AMMr6SXSBvpEK1x9iQEL5G+GX/NTKW625mZVPwv2/+xwnKynF4ivROwCOi9vZ9VqgvMBQ7MpOJOlcHV3dqr0WKqwtkf+JZ0iJacmzm9RLoX/lt8BkhnUaEyDzjApdnTxZnzPLSYqvD2w7E1W50qZ3DUTFebVlK+Jx0gn1PlBMaj62cqOXEvkfakQ2zmWjmvkg6gQi0CXC4dYjNnDgh5ifQw/IsBO/mCsAqNtcBumVR8k3QQl2bOs9FiKnkDgVOlQ4Bb5TxHOoBSASfet+fEbq2XSB8KvCWdQ6lAPTA4k4pnJUO4MnP+p3QApfJUABOkQ7hSTudOnVKhJ75rK75b6yXSo4G3RUMotbUm/F3btVIBXJg5x0sHUKoVZcAJkgG0nEq17WTJwUXLGVzt4AjJDEptg+iJ8NIz50no0grKXcO8RHqk1ODS5RwnPL5S2yM2e0qX80jh8ZXaHrEJRKycwTVpD5QaX6l2OkxqYMmZ81AgJji+Uu2xl5dIi1yZQ7KcukurioXI7Ck9cypVDER+VyXLua/g2Ep1RHjKGVzIax+JsZXqBJHrWknNnEOBXkJjK9VRIovWSJVT7KwLpTqht5dI71zoQaXKqZe/VMWm4LOnVDn3EBpXqc4aXugBpcpZ8F0EpXbQsEIPKFXOnYTGVaqzBhZ6QC2nUu1T8FP4tJxKtY+WUylHhaaclULjKtVZoXnOqZcmUcWmd6EHLHg5vUQ6ii5YpIpPwScUiZmzTGBMpXZUwcspURQtZzf5euTVt+6MTRI5SbvU5TDr4LOCjilRFOmLipWsG2MP9IgYW/ADF2EQwa4r/JiFVy8wZskbH3n97b5m0/7SOUpYQ6EHLHg5M6l4A/4iMaoL3Rh7QI+Ad6/GQg8otYtZKzRuSTohMmvOAFNzkHSOElf6M2dgvdC4JemW2G+bpTOEQMGfjkmVc53QuCVnXOSdd6vM+kOkc4TAkkIPKFXONULjlpxfxSbrAbbCyBR6QKlyFvyvUCk6wsyfP9isE1suIGQ+LvSAUuX8VGjcknJ7+X010hlCJDTl/ERo3JIx2ix8f6hZc7h0jhDJFHpAnTmL1B2xSYU9lyzcLFpO1R4HmEUL9zArj5DOESLLSWZD81LKIkBfm+ukO2KTVhmjb7sroII/3wShcmZS8TrgA4mxi91I8+nHe5lqXT6xsMJTzsBswbGL1l2xe5YZo+/sKbCMxKCS/5PnCI5dlPYyyz4ZYZborFl4CyUG1ZmziNwVu+dTY/T6SwJmSAwqWc5/C45ddPYwK5bsZzI6axZehmR2kcTAYuXMpOIrgY+kxi82d8YmfWwMMekcIfSc1MDSBxbEHngxGcqq6oPNh/q6pgwtp2rbr8vvXWgM5dI5Qiq05XxeeHznDWbtyjHmfT2HVsYCktlqqcFFy5lJxVcA8yUzuO622OQFxtBDOkdIie7ZSc+cAP+QDuCqQWRXHx2ZO0Y6R4iFvpz/Jx3AVbfG7p9rjC76JCQHvCAZwIVyzgBWSIdwzQDWrz0+MluvciBnNsnsWskA4uXMpOI5dPbcSir2u3eMKfzKVupzf5MOIF7OwF+lA7ikD7XZkyNvjZbOEWLNwIPSIVwp5wvAKukQrrgx9uBsY+gnnSPEniaZFb8InRPlzKTizcAfpXO4oBebNpwSeU2v3i7rN9IBwJFyBiZLB3DBL2JT3o4Y+kvnCLElwFPSIcChcmZS8feAl6RzSKqkrnZC5GVdKUzWAySzTlxCx5lyBu6XDiDpv8v+8FbE2EHSOUKsGXhAOsRmrpXzr8Bq6RASelC/6azo86Okc4TcMySzi6VDbOZUOYO1O514Ml5o15Y9+kbU2J2lc4ScU797TpUzcAewUTpEIZXTWH9u9B8jpHOE3FIgLR0in3PlzKTiq4DfSucopGvKHns9anK7SucIucmuHAjazLlyBm5FYCVhCWU0NV4UfXq4dI6QW4W/x+YUJ8uZScWXAlOkcxTCD8oef73M5IZK5wi5m0hmnVuxrUw6wDbcDJwPpXt5jijNTZdFn9hDMkNdk+WYh2qpb4amHHxzVBk3HN+DHz9bxxMfNFEeheEDIzx0Wk/699hyBYjF2RznTtvE8hpLxMClh8S48sgKAK79Rx1Pf9jEwbtEmTqhJwC/n9PA2k32859xxKfAfdIhWuPkzAmQScUzwN3SObrTZdHpr8dMs2g5K6Lw3Hm9mDOxN7P/qxfPfNTEzCVNnDS8jLmX9+Kdy3ozYmCEm2dsvY5PWQRuO7kHC67ozcyLejHpzUbmr2omW2d5dUkz71zWm2ZreXdFM5saLVPmNHL5GOf+1t4gsUhRezhbzsD/UKInxBtyuR+U/d8Q8RzG0LvcnxEbc9DYDAY4eXgZZRH/+0fuFmXJhtxW9921T4RDdvWvcd2nwjBqpwhL1/uzaEOzxVrLpkaIReHWVxv4weHlxKJOrb80F3hYOkRbnC5nJhXPAj+XztEdLo4+PbPcNO0pnQOgOWc5eHINO9+6gZP2KuOI3bZ8tvPg7Ea+tve2nwFl1uX4d3UzR+wWpU+F4YxRMUbfX8ue/SP0qzC8uayZ0/Z17rK7V7p2hDafsdZKZ9gmL5GO4i/dUELnnFr7XsX5i3qYRqeO0q6rs0z480bu/loP9t/ZnxFvfKmet6qbefzMnhjT+qxX02A5dkot14+r4PRRWxfw4umbuGJMObOqm3n2oyYOHBzlv48Rf975OMnsGdIhtsXpmRM+fzvZD6VzdKVzo8/OdK2YAP17GI4bVsYzHzYB8PDsBp5c2MQjp7ddzMZmyxmPbeTsA2KtFvPf1f7ENGJQhKlzGnnsW5XMXdnMwjWiE1YdcI1kgPZwvpwAmVT8n8BU6Rxdw9pE2Z8GSqfYbFVtjnV1/t7TpkbLPz9uYt+qCM982MQtrzQw/ds9qYy1XkxrLRdNr2NUVZSrx7Y+E/70+Xp+cXwFjTloDnbSIgY2NnbLw2mvW0lmM6IJ2sHll1Ja+iHwVaCozz/9dvT5NypNgzNLK1TXWM6btpHmHOQsnLlfjFNGxNj7rg3UN8NJv/fPpDxytyiTT+nJsg05Lp5ex1NnV/LK4mZ+/04jB+wc4eDJ/suEN51Qwfh9/Bl02nuNjBkSZUgffw4Yu1uUA+6r4cDBEQ7aRWyxtLn4L9M5z/nnnPm8RPoMivx6Q/MqLlzQy9Tpu09kbATGkMwWxYXMi2K3drNMKv6/wJ+kc3TWhMiMt7SYoq4slmJCkZUz8D38S0kUnRtiU3RZBTmPksz+TjpERxRdOTOp+FrgTED2kEIHxSMz3+5rNpXQy0FFZRHwX9IhOqroygmQScVfAxLSOTrixtgDuly8jEbg2ySz66WDdFRRlhMgk4rfTpFcKf6EyKzZ/U2tXu5SxnUks29Kh+iMoi1n4AKKYOn6W2K/LZ5D4qXlaeA26RCdVdTlDM69PQ3ISmdpy7jIO+9WmfW6tELhVQPnkcwW7R/Goi4nQCYVnwecjqMHiH4VmxyKKzo4Zh0wnmS2qN/RVPTlBMik4s8BF0vnaOkIM3/+YLPuUOkcIVODX8zZ0kF2VEmUEyCTik8FktI58t1efp9zl74ocXXAaSSzr0kH6QolU06ATCp+A45cNX60Wfj+ULPmcOkcIdIIfItkVnSp+K5UUuUMXAaInwlyZ2zSZ9IZQiQHnEMy+6R0kK5UcuXMpOIWuBTBNS8OMIsW7m5WOvPOkxJngYtJZh+TDtLVSq6c8HlBL0FodeI7Y5NWGYNTF8spYVeSzD4kHaI7lGQ5YYuCFvQ56L7m00V7muojCzlmiF1PMluyV2gsqvdzdpaXSF8P/LIQY/29/CevjIwsOboQY4VYA3AZyazInlGhlOzMmS+Tit8InEc3n6gw3Cz9ZIRZorNm91oBHF/qxYSQlBM+fx10PNBt7064K3bPYmPQd590n1nAYSSzr0oHKYTQlBM+v1DY0cDCrt72MLN8yZfMJzprdp8/AuNIZovyjfadEapyAmRS8bnAYcDjXbndO2KTFhlTVBdMKxY54FqS2bNJZjdJhymkUBwQaouXSF8N3MIOXoVwKKuqX664cpAxpbvokpAs8F2S2aekg0gI3cyZL3jD9nH4qxp32q/L7/1Ai9nl3geODGsxIeTlBMik4q/gL/UwpTP334W1K8aY9/VsoK6TA24HDiGZfU86jKRQ79a25CXSceA3QLtX/3okduOLR0fnHdt9qUJlPnARyexM6SAuCP3MmS+TiqeB/WjnsnCDyK4+KjJvTPemCoVG/JNERmsxv6AzZxu8RPo44A6gzQtzPRS75cXjo3N01twx/wK+TzK7QDqIa7Sc2+Al0hH883N/CVTl3zaA9WvfrphYbgy9RcIVv8XA1SSzRb28RnfScraDl0j3A34GfB+IAdwfu+3Fr0Rn6azZcbXAncBNJLO10mFcpuXsAC+RHgZc34+aCbMrLi03hr7SmYrISuAu4D6S2bXtvZMx5kHgFGCltTZUV8zXcnbCkz89eY9Toq//GLgQqJTO47gP8K8dO5Vktq6jdzbGHIN/0a6pWk7Vfsl+Vfi7ut8DnFkQ1xEzgVuBaSSzuR3ZkDHGA57UcqqOS/brhT+Lnot/3m5YWeBJ/JWjZ3TVRrWcqmsk+w0Hzgo+DhROUyjzgGnAI93xkoiWU3W9ZL99+aKopbRorgVewy/kNJLZLn8LXj4tp+peyX4H8kVRhwun6YwG4Dn8ld2mk8wuL9TAWk5VOMl++wBjgo/DgENw76hvPbAAmA38HXhKYo1LY8yf8N85VIV/iZKfW2vFLntaSFpOFyT7RfF3ezeXdQz+89WKAiVYArzT4uN9ktmmAo2vWqHldFWyXzn+W9k8YBD+zNHWv/1hq+vkWvzZbyOwNvhYk/fvR2wuYgdOClCFo+UsBf7MOwD/XUZ1QD3JbL1sKLWjtJxKOUrfz6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo7ScSjlKy6mUo/4/OMXWX7D9Wd8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "affairs_distribution = df['affairs'].value_counts()\n",
    "print('affairs的分布：', dict(affairs_distribution))\n",
    "plt.pie(affairs_distribution, labels=affairs_distribution.index, autopct='%1.1f%%')\n",
    "plt.title('Affairs Distribution')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "c90ecf92-c542-43b8-a515-bc98e48ba0c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "yrs_married： {2.5: 2034, 6.0: 1141, 16.5: 818, 23.0: 811, 9.0: 602, 13.0: 590, 0.5: 370}\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEjCAYAAADDry0IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAhZUlEQVR4nO3deZwlVX338c9XdmVVWpZZGJZBBRJHGZFINPigkUUDmgjDE1kUMy4g8mjyOAiJxDgGFUxQI2RUgiyCCCgom0hcEYQBWYZNBhhgmBEGUFYlzvDNH3UaivZ2122m79L09/163VfXPVV16te3b9/frXNO1ZFtIiIiRvKCXgcQERH9L8kiIiIaJVlERESjJIuIiGiUZBEREY2SLCIiolGSRXSEpBMk/eMY1TVV0mOSVinPfyTpvWNRd6nvQkkHjFV9ozjupyQ9IOnX3T52r6zMaz3Wf/cYnVV7HUCMP5IWARsBy4EVwE3AycA8208B2H7/KOp6r+0fDLeN7buBtVcu6qePdxSwle131erfbSzqHmUcU4CPApvZvr/bx++VXrzWMTZyZhHP1dtsrwNsBhwNfAz42lgfRNLz9QvNZsCDY5Eo+vE1GhqTKvm8Gcfyx4uVYvth2+cB+wAHSNoOQNJJkj5VljeU9D1Jv5X0kKSfSnqBpFOAqcB3SzPT/5c0TZIlHSTpbuC/a2X1D6AtJV0p6WFJ50p6cTnWzpIW12OUtEjSmyTtCnwc2Kcc77qy/unmjRLXkZLuknS/pJMlrVfWDcZxgKS7SxPSEcO9NpLWK/svK/UdWep/E3AJsGmJ46QW+y6Q9Lba89XK8WYM8xqtKelUSQ+W1/kqSRuN9Lcrv/enJP28xPFdSS+RdJqkR0od02rbHyfpnrLuakmvr607StJZJYZHgANL/XMlXQY8AWwxtClJ0nsk3SzpN5IulrRZbd2bJd1S/sZfAjTS7xOdlWQRY8L2lcBi4PUtVn+0rBugar76eLWL9wPupjpLWdv2Z2v7/AXwCuAtwxxyf+A9wKZUzWFfaCPGi4BPA98sx3tli80OLI83AltQNX99acg2fw68DNgF+CdJrxjmkF8E1iv1/EWJ+d2lyW03YEmJ48AW+54MvKv2fHdgqe1ra2X11+iAcqwpwEuA9wO/GyauulnAfsAkYEvgcuC/gBcDNwOfqG17FTCjrPsG8C1Ja9bW7wmcBawPnFbK9gNmA+sAd9UPLGkvqvfCO6jeGz8FTi/rNgTOBo4ENgRuB3Zq4/eJDkmyiLG0hOqDZKg/AJtQtc//wfZP3XxTsqNsP257uA+8U2wvsP048I/A3iod4Cvpb4HP277D9mPA4cCsIWc1/2z7d7avA64D/ijplFj2AQ63/ajtRcCxVB+e7TgV2F3SuuX5fsApQ7apv0Z/oEoSW9leYftq24+0cZz/sn277YeBC4Hbbf/A9nLgW8CrBje0fartB20vt30ssAZV0hx0ue3v2H6q9nc7yfaNZZ8/DDn2+4B/tX1zOd6ngRnl7GJ34CbbZ5X9/h2YMAMB+lGSRYylScBDLco/BywEvi/pDklz2qjrnlGsvwtYjeob6MralGd/A76LaiBIvUmn/qH1BK073zcEVm9R16R2grC9BLgM+GtJ61OdiZw2ZLP6a3AKcDFwhqQlkj4rabU2DnVfbfl3LZ4//btJ+mhpMnpY0m+pzmTqr3mrv9lIf8fNgONKs9lvqd47onqNNq3vW75cNL0nooOSLGJMSHoN1T/5z4auK9+sP2p7C+BtwEck7TK4epgqm848ptSWp1J9s34AeBx4YS2uVaiaONqtdwnVh1i97uU8+0O0HQ+UmIbWde8o6vg6VVPUO6m+tQ/d9+nfpZyx/bPtbYDXAW+lavYaE6V/4mPA3sAGttcHHubZ/QitXtuRXu97gPfZXr/2WMv2z4Gl1P7GksSz/+bRZUkWsVIkrSvprcAZwKm2b2ixzVslbVX+4R+hGm67oqy+j6pNf7TeJWkbSS8EPgmcZXsF8CtgTUl7lG/WR1I1lwy6D5im4UfmnA78P0mbS1qbZ/o4lo8muBLLmcBcSeuUppWPUDUvtes7wKuBD1P1YQxL0hsl/UlJjo9QJaoVI+0zSutQJc1lwKqS/glYd+RdGp0AHC5pW3h6QMA7y7rzgW0lvaM0AR4KbLySx4uVkGQRz9V3JT1K9e3wCODzwLuH2XY68APgMaoO1C/b/lFZ96/AkaUp4u9HcfxTgJOomoTWpPowobS9fxD4KtW3+MepOtcHfav8fFDSNS3qPbHU/RPgTuD3wIdGEVfdh8rx76A64/pGqb8tpd3/bGBz4JyGzTem6lx+hKpj+seMLjE1uZiqT+NXVM1pv2clm4Vsfxv4DFXT2SPAAqrmNmw/QHVGdTTwINV76LKVOV6sHGXyo4j+Vb7Bb12/iDCiF/ruYp6IqKi6duQg2h9BFdExaYaK6EOS/o6qmedC2z9ZiXoeG+bR6nqYiGGlGSoiIhrlzCIiIholWURERKPnbQf3hhtu6GnTpvU6jIiIceXqq69+wPbA0PLnbbKYNm0a8+fP73UYERHjiqS7WpWnGSoiIholWURERKMki4iIaJRkERERjZIsIiKiUZJFREQ0SrKIiIhGSRYREdGoYxflSZpCNbvXxsBTwDzbx5XbLn8TmAYsAva2/Zuyz+FUt2ReARxq++JSvj3VRDdrARcAH/YY3wFx2pzzx7K6P7Lo6D06Wn9ERCd18sxiOfBR268AdgQOlrQNMAe41PZ04NLynLJuFrAtsCvw5TJFJMDxwGyq2bKml/UREdElHUsWtpfavqYsP0o11eMkYE+qiegpP/cqy3sCZ9h+0vadwEJgB0mbAOvavrycTZxc2yciIrqgK30WkqYBrwJ+AWxkeylUCQV4adlsEs+e03dxKZvEs+dQHiyPiIgu6XiykLQ21aTzh9l+ZKRNW5R5hPJWx5otab6k+cuWLRt9sBER0VJHk4Wk1agSxWm2zynF95WmJcrP+0v5YmBKbffJwJJSPrlF+R+xPc/2TNszBwb+6A67ERHxHHUsWUgS8DXgZtufr606DzigLB8AnFsrnyVpDUmbU3VkX1maqh6VtGOpc//aPhER0QWdnM9iJ2A/4AZJ15ayjwNHA2dKOgi4G3gngO0bJZ0J3EQ1kupg2yvKfh/gmaGzF5ZHRER0SceShe2f0bq/AWCXYfaZC8xtUT4f2G7soouIiNHIFdwREdEoySIiIholWURERKMki4iIaJRkERERjZIsIiKiUZJFREQ0SrKIiIhGSRYREdEoySIiIholWURERKMki4iIaJRkERERjZIsIiKiUZJFREQ0SrKIiIhGnZxW9URJ90taUCv7pqRry2PR4Ax6kqZJ+l1t3Qm1fbaXdIOkhZK+UKZWjYiILurktKonAV8CTh4ssL3P4LKkY4GHa9vfbntGi3qOB2YDVwAXALuSaVUjIrqqY2cWtn8CPNRqXTk72Bs4faQ6JG0CrGv7ctumSjx7jXGoERHRoFd9Fq8H7rN9W61sc0m/lPRjSa8vZZOAxbVtFpeyiIjook42Q41kX559VrEUmGr7QUnbA9+RtC3Qqn/Cw1UqaTZVkxVTp04dw3AjIia2rp9ZSFoVeAfwzcEy20/afrAsXw3cDmxNdSYxubb7ZGDJcHXbnmd7pu2ZAwMDnQg/ImJC6kUz1JuAW2w/3bwkaUDSKmV5C2A6cIftpcCjknYs/Rz7A+f2IOaIiAmtk0NnTwcuB14mabGkg8qqWfxxx/YbgOslXQecBbzf9mDn+AeArwILqc44MhIqIqLLOtZnYXvfYcoPbFF2NnD2MNvPB7Yb0+AiImJUcgV3REQ0SrKIiIhGSRYREdEoySIiIholWURERKMki4iIaJRkERERjZIsIiKiUZJFREQ0SrKIiIhGSRYREdEoySIiIholWURERKMki4iIaJRkERERjZIsIiKiUZJFREQ06uS0qidKul/SglrZUZLulXRteexeW3e4pIWSbpX0llr59pJuKOu+UObijoiILurkmcVJwK4tyv/N9ozyuABA0jZUc3NvW/b5sqRVyvbHA7OB6eXRqs6IiOigjiUL2z8BHmpz8z2BM2w/aftOYCGwg6RNgHVtX27bwMnAXh0JOCIihtWLPotDJF1fmqk2KGWTgHtq2ywuZZPK8tDyiIjoom4ni+OBLYEZwFLg2FLeqh/CI5S3JGm2pPmS5i9btmwlQ42IiEFdTRa277O9wvZTwFeAHcqqxcCU2qaTgSWlfHKL8uHqn2d7pu2ZAwMDYxt8RMQE1tVkUfogBr0dGBwpdR4wS9Iakjan6si+0vZS4FFJO5ZRUPsD53Yz5oiIgFU7VbGk04GdgQ0lLQY+AewsaQZVU9Ii4H0Atm+UdCZwE7AcONj2ilLVB6hGVq0FXFgeERHRRR1LFrb3bVH8tRG2nwvMbVE+H9huDEOLiIhRyhXcERHRKMkiIiIaJVlERESjJIuIiGiUZBEREY2SLCIiolGSRURENEqyiIiIRkkWERHRKMkiIiIaJVlERESjJIuIiGiUZBEREY2SLCIiolGSRURENEqyiIiIRh1LFpJOlHS/pAW1ss9JukXS9ZK+LWn9Uj5N0u8kXVseJ9T22V7SDZIWSvpCmV41IiK6qJNnFicBuw4puwTYzvafAr8CDq+tu932jPJ4f638eGA21bzc01vUGRERHdaxZGH7J8BDQ8q+b3t5eXoFMHmkOiRtAqxr+3LbBk4G9upAuBERMYJe9lm8B7iw9nxzSb+U9GNJry9lk4DFtW0Wl7KIiOiiVXtxUElHAMuB00rRUmCq7QclbQ98R9K2QKv+CY9Q72yqJiumTp06tkFHRExgXT+zkHQA8Fbgb0vTEraftP1gWb4auB3YmupMot5UNRlYMlzdtufZnml75sDAQKd+hYiICaeryULSrsDHgL+y/UStfEDSKmV5C6qO7DtsLwUelbRjGQW1P3BuN2OOiIgONkNJOh3YGdhQ0mLgE1Sjn9YALikjYK8oI5/eAHxS0nJgBfB+24Od4x+gGlm1FlUfR72fIyIiuqBjycL2vi2KvzbMtmcDZw+zbj6w3RiGFhERo5QruCMiolFbyULSTu2URUTE81O7ZxZfbLMsIiKeh0bss5D0Z8DrgAFJH6mtWhdYpZOBRURE/2jq4F4dWLtst06t/BHgbzoVVERE9JcRk4XtHwM/lnSS7bu6FFNERPSZdofOriFpHjCtvo/t/9OJoGJ0ps05v6P1Lzp6j47WHxH9r91k8S3gBOCrVBfNRUTEBNJuslhu+/iORhIREX2r3aGz35X0QUmbSHrx4KOjkUVERN9o98zigPLzH2plBrYY23AiIqIftZUsbG/e6UAiIqJ/tZUsJO3fqtz2yWMbTkRE9KN2m6FeU1teE9gFuIZqTuyIiHiea7cZ6kP155LWA07pSEQREdF3nustyp+gms0uIiImgHb7LL5LNfoJqhsIvgI4s1NBRUREf2m3z+KY2vJy4C7bi0faQdKJwFuB+21vV8peDHyT6rYhi4C9bf+mrDscOIjqCvFDbV9cyrfnmWlVLwA+bNtERETXtNUMVW4oeAvVnWc3AP6njd1OAnYdUjYHuNT2dODS8hxJ2wCzgG3LPl+WNHgL9OOB2VTNXtNb1BkRER3W7kx5ewNXAu8E9gZ+IWnEW5Tb/gnw0JDiPYGvl+WvA3vVys+w/aTtO4GFwA6SNgHWtX15OZs4ubZPRER0SbvNUEcAr7F9P4CkAeAHwFmjPN5GtpcC2F4q6aWlfBJwRW27xaXsD2V5aHlERHRRu6OhXjCYKIoHR7FvO9SizCOUt65Emi1pvqT5y5YtG7PgIiImunY/8C+SdLGkAyUdCJxP1dk8WveVpiXKz8EEtBiYUttuMrCklE9uUd6S7Xm2Z9qeOTAw8BzCi4iIVkZMFpK2krST7X8A/hP4U+CVwOXAvOdwvPN45qaEBwDn1spnSVpD0uZUHdlXliarRyXtKEnA/rV9IiKiS5r6LP4d+DiA7XOAcwAkzSzr3jbcjpJOB3YGNpS0GPgEcDRwpqSDgLupOsyxfaOkM4GbqIbmHmx7cJKlD/DM0NkLyyMiIrqoKVlMs3390ELb8yVNG2lH2/sOs2qXYbafC8xtdSxgu4Y4IyKig5r6LNYcYd1aYxlIRET0r6ZkcZWkvxtaWJqRru5MSBER0W+amqEOA74t6W95JjnMBFYH3t7BuCIioo+MmCxs3we8TtIbeabf4Hzb/93xyCIiom+0O5/FD4EfdjiWiIjoU2N5FXZERDxPJVlERESjJIuIiGiUZBEREY2SLCIiolGSRURENEqyiIiIRkkWERHRqN1pVSM6Ztqc8zta/6Kj9+ho/RETQZJFxEpKsouJIM1QERHRqOvJQtLLJF1bezwi6TBJR0m6t1a+e22fwyUtlHSrpLd0O+aIiImu681Qtm8FZgBIWgW4F/g28G7g32wfU99e0jbALGBbYFPgB5K2rk27GhERHdbrZqhdgNtt3zXCNnsCZ9h+0vadwEJgh65EFxERQO+TxSzg9NrzQyRdL+lESRuUsknAPbVtFpeyiIjokp4lC0mrA38FfKsUHQ9sSdVEtRQ4dnDTFrt7mDpnS5ovaf6yZcvGNuCIiAmsl2cWuwHXlNn4sH2f7RW2nwK+wjNNTYuBKbX9JgNLWlVoe57tmbZnDgwMdDD0iIiJpZfJYl9qTVCSNqmtezuwoCyfB8yStIakzYHpwJVdizIiInpzUZ6kFwJvBt5XK/6spBlUTUyLBtfZvlHSmcBNwHLg4IyEiojorp4kC9tPAC8ZUrbfCNvPBeZ2Oq6IiGit16OhIiJiHEiyiIiIRkkWERHRKMkiIiIaJVlERESjJIuIiGiUZBEREY2SLCIiolGSRURENEqyiIiIRj253UdE9I9pc87vaP2Ljt6jo/VHdyRZRMS4lmTXHWmGioiIRkkWERHRKMkiIiIaJVlERESjJIuIiGjUk2QhaZGkGyRdK2l+KXuxpEsk3VZ+blDb/nBJCyXdKuktvYg5ImIi6+WZxRttz7A9szyfA1xqezpwaXmOpG2AWcC2wK7AlyWt0ouAIyImqn5qhtoT+HpZ/jqwV638DNtP2r4TWAjs0P3wIiImrl4lCwPfl3S1pNmlbCPbSwHKz5eW8knAPbV9F5eyiIjokl5dwb2T7SWSXgpcIumWEbZVizK33LBKPLMBpk6duvJRRkQE0KMzC9tLys/7gW9TNSvdJ2kTgPLz/rL5YmBKbffJwJJh6p1ne6btmQMDA50KPyJiwul6spD0IknrDC4DfwksAM4DDiibHQCcW5bPA2ZJWkPS5sB04MruRh0RMbH1ohlqI+DbkgaP/w3bF0m6CjhT0kHA3cA7AWzfKOlM4CZgOXCw7RU9iDsiYsLqerKwfQfwyhblDwK7DLPPXGBuh0OLiIhh9NPQ2YiI6FNJFhER0SjJIiIiGiVZREREoySLiIholGQRERGNkiwiIqJRkkVERDTq1Y0EIyICmDbn/I7Wv+joPcaknpxZREREoySLiIholGQRERGNkiwiIqJRkkVERDRKsoiIiEZJFhER0agX06pOkfRDSTdLulHSh0v5UZLulXRteexe2+dwSQsl3SrpLd2OOSJiouvFRXnLgY/avqbMxX21pEvKun+zfUx9Y0nbALOAbYFNgR9I2jpTq0ZEdE/XzyxsL7V9TVl+FLgZmDTCLnsCZ9h+0vadwEJgh85HGhERg3raZyFpGvAq4Bel6BBJ10s6UdIGpWwScE9tt8WMnFwiImKM9SxZSFobOBs4zPYjwPHAlsAMYClw7OCmLXb3MHXOljRf0vxly5aNfdARERNUT5KFpNWoEsVpts8BsH2f7RW2nwK+wjNNTYuBKbXdJwNLWtVre57tmbZnDgwMdO4XiIiYYHoxGkrA14CbbX++Vr5JbbO3AwvK8nnALElrSNocmA5c2a14IyKiN6OhdgL2A26QdG0p+ziwr6QZVE1Mi4D3Adi+UdKZwE1UI6kOzkioiIju6nqysP0zWvdDXDDCPnOBuR0LKiIiRpQruCMiolGSRURENEqyiIiIRkkWERHRKMkiIiIaJVlERESjJIuIiGiUZBEREY2SLCIiolGSRURENEqyiIiIRkkWERHRKMkiIiIaJVlERESjJIuIiGiUZBEREY2SLCIiotG4SRaSdpV0q6SFkub0Op6IiIlkXCQLSasA/wHsBmxDNV/3Nr2NKiJi4hgXyQLYAVho+w7b/wOcAezZ45giIiYM2e51DI0k/Q2wq+33luf7Aa+1fciQ7WYDs8vTlwG3djCsDYEHOlh/J43n2CHx91ri761Ox7+Z7YGhhat28IBjSS3K/ijL2Z4HzOt8OCBpvu2Z3TjWWBvPsUPi77XE31u9in+8NEMtBqbUnk8GlvQoloiICWe8JIurgOmSNpe0OjALOK/HMUVETBjjohnK9nJJhwAXA6sAJ9q+scdhdaW5q0PGc+yQ+Hst8fdWT+IfFx3cERHRW+OlGSoiInooySIiIholWURERKMki4iIaDQuRkP1G0kvtv1Qr+MYDUkbAZOoLmZcYvu+Hoc0YUh6OdXtaZ5+/YHzbN/c08BGIe+f/tDLz56Mhmog6UjbnyrL2wDfAVajuqp8H9u/6GF4jSTNAE4A1gPuLcWTgd8CH7R9TW8ie24k/TnVvcIW2P5+r+NpIuljwL5U9zNbXIonU10rdIbto3sVWzueD++f8Zqs++6zx3YeIzyAa2rL5wO7leUdgJ/3Or424r+W6j5aQ8t3BK7rdXxtxH9lbfnvyu/zCeAyYE6v42sj/l8Bq7UoXx24rdfxtRH/eH//fKz8DnOAd5XHnMGyXsfXEHtfffakGWp0NrV9IYDtKyWt1euA2vAit/gGYvsKSS/qRUCjtFpteTbwZtvLJB0DXAH09Tdz4ClgU+CuIeWblHX9bry/fw4CtrX9h3qhpM8DN9L/759BPf/sSbJotoWk86hO/SZLeqHtJ8q61UbYr19cKOl84GTgnlI2BdgfuKhnUbXvBZI2oBqMIdvLAGw/Lml5b0Nry2HApZJu45nXfyqwFXDIcDv1kfH+/hnPybqvPnuSLJoNnTfjBfB0h9/x3Q9ndGwfKmk3nmmzFVXb+X/YvqCnwbVnPeBqqrgtaWPbv5a0Nq3vRtxXbF8kaWuqpoP663+V7RU9Da4Nz4P3z2GM32TdV5896eCOcUnSC4GNbN/Z61iiv0l6AeM0WfeTXGexEspkS+PWeI7f9hPjPVFI+l6vY1gZ4+X9Y/sp21fYPtv2WWV5XCeKXrz2SRYrp++bQRqM6/jH+4ct1eiu8Szvn97p+mufZqg2lHHak4Bf2H6sVr6r7b7u5JP0WuBm24+UERRzgFcDNwGftv1wTwNcCZI2sb2013E830naEng7Vcf2cuA24PTx/N6B8fH+6adrRHJm0UDSocC5wIeABZLqnU6f7k1Uo3IiMDiC4jiqDuPPlLL/6lVQK0PSSwD6/R8dQNLGko6X9B+SXiLpKEk3SDpT0ia9jq9Jef+fAKwJvAZYiyppXC5p595FtvL6/f1TLug8g+os4kqqSeAEnC5pTtfjyZnFyCTdAPyZ7cckTQPOAk6xfZykX9p+VW8jHJmkm22/oixfY/vVtXXX2p7Rs+DaIOlo4BjbD0iaCZxJNeRxNWB/2z/uaYANJF1EdUHVi4D/C5wGnE71bfFNtoeOeOkr5f0/w/aKMqjgAts7S5oKnDsO3v8bU13E+RTwT1Rf+v4auBn4cD8nDEm/ovU1IqsDN9qe3s14cmbRbJXBpifbi4Cdgd3KRT3joc12gaR3l+XrygcuZTjnH4bfrW/sYfuBsvw5qtscbAW8GTi2d2G1bSPbX3R1W4/1bX/G9t22vwhs1uvg2jQ4xH4NYB0A23czPq4zOomqyfUe4IfA74A9gJ9SnTH1s8FrRIbqyTUiuc6i2a8lzbB9LUA5w3grVfPOn/Q0sva8FzhO0pHAA1TNB/dQ/fO8t6eRtWc1SavaXg6sZfsqANu/krRGj2NrR/0L2clD1q3SzUCeo68CV0m6AngDVRMmkgaA8XAzzY1KYkbSB21/ppR/UdJBPYyrHYfRR9eIpBmqgaTJwHLbv26xbifbl/UgrFGTtA6wBdUXhMUeJ3cNlfQh4G1Ut2V4A7A+cA6wC7CF7f16F10zSZ8EPlsfGFHKtwKOtv03vYmsfZK2BV5BdfPGW3odz2hIus72K8vyp2wfWVt3g+2+/sLXT9eIJFlE3ysdqR8AtqZKdvdQ3YHzxHLG0dfG82i68e75kKz7RZJFjFuS3m27r0d0lTOjQ6g6VGdQdaqeW9Y9a8BBdNd4eP/0kySLGLck3W17aq/jGMl4H033fDYe3j/9JB3c0dckXT/cKmCjbsbyHD1rNF1pUjtL0maMj9F049rz4P3TN5Isot9tBLwF+M2QcgE/7344ozbeR9ONd+P9/dM3kiyi330PWHvww7ZO0o+6Hs3o7U91i4ynlU75/SX9Z29CmlDG+/unb6TPIiIiGuUK7oiIaJRkERERjZIsYsJQ5WdlmtDBsr3Lzf46edyTJD1RrqIfLDtOkiVtuJJ1f1XSNqPYfudxPo9D9EiSRUwYrjro3g98XtKakl4EzAUOfi71SRrNvZ0WUuZULrdweCNw78ocT9Iqtt9r+6bR1BPxXCRZxIRiewHwXeBjVLeuPhU4QtJVkn45OF+JpGmSfirpmvJ4XSnfWdIPJX0DuEHSiySdL+k6SQsk7TPMoU8HBtftDFxGbZSUpO9IulrSjapNmSnpMUmflPQL4M9aPP9R7U7Cfynp8hLvtyStXcp3lXSLpJ8B7xijlzImmCSLmIj+mWpuid2oJvX5b9uvofq2/7lyxnE/8OZyO459gC/U9t8BOML2NsCuwBLbr7S9HTBck9ZtwICkDYB9qSa1qXuP7e2BmcChKhM8Uc2DscD2a23/rMVzAEpz1pFUc2S8GpgPfETSmsBXqG7G+Hpg49G9VBGVXGcRE47txyV9E3gM2Bt4m6S/L6vXpLoN9BLgS5JmACuobmI46Erbd5blG4BjJH0G+J7tn45w6HOAWcBrgfcNWXeopLeX5SnAdODBcuyza9sNfT5oR2Ab4DJJAKsDlwMvB+60fRuApFOB2S32jxhRkkVMVE+Vh4C/tn1rfaWko4D7gFdSnYH/vrb68cGFMq/G9sDuwL9K+r7tTw5zzDOAa4Cv236qfKgP3lX3TVT3kHqiXCy2Ztnn90NuRz30+dMhA5fY3nfI7zGDau7miJWSZqiY6C4GPqTyyS1p8MZ+6wFLbT8F7McwExVJ2hR4wvapwDHAsHeRLbPLHQF8eciq9YDflETxcqqzhNG6Atip3HobSS9UNRviLcDmkrYs2+07XAURI0myiInuX6imB71e0oLyHKoP9ANUzRC3NbWziSH+BLhS0rVUieBTIx3M9n/avn1I8UXAquWmd/9C9cE/KraXAQcCp5d6rgBebvv3VM1O55cO7rtGW3cE5HYfERHRhpxZREREoySLiIholGQRERGNkiwiIqJRkkVERDRKsoiIiEZJFhER0SjJIiIiGv0vS5NCCDtTik8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "yrs_married_counts = df['yrs_married'].value_counts()\n",
    "print('yrs_married：', dict(yrs_married_counts))\n",
    "yrs_married_counts.plot.bar()\n",
    "plt.title('Distribution of yrs_married')\n",
    "plt.xlabel('Years Married')\n",
    "plt.ylabel('Count')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "3509a6c2-f89f-43f3-b049-a7c91e835ed8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEGCAYAAACNaZVuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVF0lEQVR4nO3dfZBddX3H8c+HJMhTBNIsSaqJCxIpEWPEWzTGqSAIaKJBWzrSUbBjDc7IoFhrg1qaYtXoVHHaKgWECgVtaXkIEiTQGLQ8qbsYQiCVgCQgJpukCEZAJcm3f9yzYXfZu3vucs89uef3fs3cuff89j58v3PmfnJyfuee44gQACAte5VdAACg/Qh/AEgQ4Q8ACSL8ASBBhD8AJGh82QXkNXny5Oju7i67DADoKL29vdsiomvoeMeEf3d3t3p6esouAwA6iu2Nw42z2wcAEkT4A0CCCH8ASBDhDwAJIvwBIEEdc7QPAOlz31mrG9du1oKjpurT7zyq7HLapnvx8t2PNyydX2Il7VVk32z5Ax3isMXLdckdG7Xpqd/qkjs26rABwVBl3UP6HLpcVUX3TfgDHeBz31mrXUPGdmXjVdYo8Kr+D0A7+ib8gQ5w49rNTY0DoyH8gQ6w4KipTY0DoyH8gQ7w6Xce9YIv617ZeJU1muSs+qRvO/om/IEO8bOl8/Whea/QtANfog/Ne4V+VvEA7Dc08Koe/P2K7tudcg3fWq0WnNgNAJpjuzciakPH2fIHgAQR/gCQIMIfABJE+ANAggh/AEgQ4Q8ACSL8ASBBhD8AJIjwB4AEEf4AkCDCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJCgQsPf9nTbq2yvs32/7Y9m45Ns32p7fXZ/cJF1AAAGK3rLf4ekv4yIIyW9UdJHbM+StFjSyoiYKWlltgwAaJNCwz8iNkXEPdnj7ZLWSXqZpIWSLs+edrmkU4qsAwAwWNv2+dvulvQ6ST+UNCUiNkn1fyAkHdKuOgAAbQp/2wdIukbSxyLiV028bpHtHts9W7duLa5AAEhM4eFve4LqwX9VRFybDffZnpb9fZqkLcO9NiIujohaRNS6urqKLhUAklH00T6WdKmkdRHxlQF/ukHSGdnjMyQtK7IOAMBg4wt+/3mS3i/pPturs7FPSVoq6WrbH5T0qKRTC64DADBAoeEfEbdLcoM/H1/kZwMAGuMXvgCQIMIfABJE+ANAggh/AEgQ4Q8ACSL8ASBBhD8AJIjwB4AEEf4AkCDCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJAgwh8AEkT4A0CCCH8ASBDhDwAJIvwBIEHjyy4AGIva+Tdr2zM7NXm/ceo57+Syy2mb7sXLdz/esHR+iZW0F323vm+2/NFxuhcv17ZndkqStj2zc9AXpMqG9knf1VZ034Q/Okrt/JubGq+KRl/8qgchfecbHwvCHx2lf4s/7ziA4RH+6CiT9xvX1DiA4RH+6CiNJnerPunbaLKv6pOf9J1vfCwIf3ScDUvn797Sn7zfuMoHQb+hfdJ3tRXdtyOipW9YlFqtFj09PWWXAQAdxXZvRNSGjrPlDwAJIvwBIEGEPwAkiPAHgAQR/gCQoELD3/ZltrfYXjtgbIntx22vzm7vKLIGAMALFb3l/01Jw/365oKImJPdbiq4BgDAEIWGf0T8QNITRX4GAKB5Ze3zP8v2mmy30MEl1QAAySoj/C+U9EpJcyRtkvTlRk+0vch2j+2erVu3tqk8AKi+tod/RPRFxM6I2CXpEknHjPDciyOiFhG1rq6u9hUJABU34mUcbf+TpIYn/4mIs5v9QNvTImJTtvhuSWtHej4AoPVGu4Zv/5nU5kmaJek/suVTJfWO9ua2vy3pWEmTbf9c0t9KOtb2HNX/Udkg6cxmiwYAvDgjhn9EXC5Jtj8g6biIeC5b/hdJt4z25hFx2jDDlzZfJgCglfLu8/99SRMHLB+QjQEAOtBou336LZX0E9ursuW3SFpSSEUAgMLlCv+I+Ffb35X0hmxocURsLq4sAECRcu32sW1JJ0h6bUQsk7S37YaHaAIA9mx59/l/XdJcSf0TuNslfa2QigAAhcu7z/8NEXG07Z9IUkT80vbeBdYFAChQ3i3/52yPU/aDL9tdknYVVhUAoFB5w/8fJV0n6RDbn5N0u6TPF1YVAKBQeY/2ucp2r6TjJVnSKRGxrtDKAACFGe3cPi+NiF/ZniRpi6RvD/jbpIjgXP0oRffi5bsfb1g6v8RK2ou+0+r7VYuX63eS9pb0YIv7Hm23z7ey+17Vz/PTf+tfBtpuYBAMt1xV9D38clV1Z8EvSb9T6/seMfwjYkF2jP9bIuKwAbdDI+KwllYC5NDoC1D1QKDvfONV8aoG/TUaH4tRJ3wjIlSf7AUAtMHvmhwfi7xH+9xt+w9b+LkAgAYa/YiqlT+uyhv+x0m6y/bD2bV377O9poV1ALk0muyr+iQgfecbr4pGk7utnPQdNfyzff4fVv26u2+V9E5JC7J7oO2GfvGrHgT96Hv45arasHT+7i39vdX6vl3fpT/Kk+zeiHh9Sz+5SbVaLXp6OMAIAJqR5Xdt6Dj7/AEgQXlP7HacpDNtb5T0tOq/8o2ImF1YZQCAwuQN/7cXWgUAoK3ynttnoyTZPkTSPoVWBAAoXN4reb3L9npJj0j6vqQNkr5bYF0AgALlnfD9rKQ3SnowIg5V/eyedxRWFQCgULkv5hIR/ydpL9t7RcQqSXOKKwsAUKS8E75P2j5A0g8kXWV7i6QdxZUFAChS3i3/hZKelXSOpJslPSx+4QsAHSvv0T5PS/WLu0j6TqEVAQAKlyv8bZ8p6XzVt/53KfuRlyTO6Q8AHSjvPv9PSHp1RGwrshgAQHvk3ef/sKRniiwEANA+ebf8z5V0p+0fSvpt/2BEnF1IVQCAQuUN/4skfU/Sfarv8wcAdLC84b8jIj5eaCUAgLbJu89/le1FtqfZntR/K7QyAEBh8m75/1l2f+6AMQ71BIAOlWvLPyIOHea2O/htv22419m+zPYW22sHjE2yfavt9dn9wS++DQBAM/Lu9hnNFxuMf1PSyUPGFktaGREzJa3MlgEAbdSq8PdwgxHxA0lPDBleKOny7PHlkk5pUQ0AgJxaFf7RxHOnRMQmScruD2n0xGySucd2z9atW19sjQCATKvCvxARcXFE1CKi1tXVVXY5AFAZrQr/DU08t8/2NEnK7re0qAYAQE55r+F7qu2J2ePP2L7W9tH9f4+I9zTxmTdIOiN7fIakZU28FgDQAnm3/P8mIrbbfrOkk1SfqL1wtBfZ/rakuyQdYfvntj8oaamkt2UXhH9btgwAaKO8P/Lamd3Pl3RhRCyzvWS0F0XEaQ3+dHzOzwUAFCBv+D9u+yJJJ0j6ou2XaA+fLE7FRavW6/o1m3TK7Gk687iZZZfTNt2Ll+9+vGHp/BIraa9U+17w1dt0/+an9eqp++vGjx1bdjltc+Wdj2jZmk1aOHua3vemQ1v63o4Y/ShN2/up/mOt+yJifTZR+5qIuKWl1YygVqtFT09Puz6uIxz5mZv07I7n19++4611f/+OEitqj4EB2C+FIKTv56XQ92uX3KynfrNz9/KB+4zTvUuG/mZ2dLZ7I6I2dHzUrXfbe0n6UURcGxHrpfrx+e0MfrzQRavWDwp+SXp2R+iiVetLqqg9hguCkcarItW+F3z1tqbGq+LKOx8ZFPyS9NRvdurKOx9p2WeMGv4RsUvSvbZntOxT8aJdv2ZTU+NAJ7p/89NNjVfFsgbf40bjY5F3v/00SffbXmn7hv5by6pA006ZPa2pcaATvXrq/k2NV8XCBt/jRuNjkTf8r5N0uqTzJX15wA0lOfO4mdp3/OBTKu073pWf9G20r7fq+4BT7bvR5G7VJ33f96ZDdeA+4waNHbjPuJZO+uad8P17Se+VdI+kyyStiDwvbCEmfIfH0T7VD8CBUu2bo33GfrRPownfXOGfvYElnSjpzyXVJF0t6dKIeHhMFTWJ8AeA5o35aJ9+2Zb+5uy2Q9LBkv7L9pdaViUAoC1y/cjL9tmqn4dnm6RvSPqriHguOwx0vaRPFlciAKDV8v7Cd7Kk90TExoGDEbHL9oLWlwUAKFKu8I+I80b427rWlQMAaAfOzwMACSL8ASBBhD8AJIjwB4AEEf4AkCDCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJAgwh8AEkT4A0CC8l7MBXuoVC/oTd9p9X3BinVatmazFs6eqnNOOrLsctrmob7tWv3Yk5oz/SAdPmViS9879wXcy8YF3F9oYBD0SyEQ6Pt5KfQ989zlem5ATE2wtP4L1e/7vOvv0xV3P7p7+fS5M3T+wtc0/T4v+gLu2LMMFwQjjVcFfecbr4oLVqwbFPyS9FzUx6vsob7tg4Jfkq6461E91Le9ZZ9B+APYYy1bs7mp8apY/diTTY2PBeEPYI+1cPbUpsarYs70g5oaHwvCv0M12tdb9X3A9J1vvCrOOelITfDgsQlW5Sd9D58yUafPnTFo7PS5M1o66cuEb4dL9egP+k6rb472GfvRPo0mfAl/AKiwRuFf2nH+tjdI2i5pp6QdwxUHAChG2T/yOi4itpVcAwAkhwlfAEhQmeEfkm6x3Wt70XBPsL3Ido/tnq1bt7a5PACorjLDf15EHC3p7ZI+YvuPhj4hIi6OiFpE1Lq6utpfIQBUVGnhHxG/yO63SLpO0jFl1QIAqSkl/G3vb3ti/2NJJ0paW0YtAJCiso72mSLpOtv9NXwrIm4uqRYASE4p4R8RP5P02jI+GwDAoZ4AkCTCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJAgwh8AEkT4A0CCCH8ASBDhDwAJKvsC7oXrXrx89+MNS+eXWEkxqt5fI/SdVt8XrFinZWs2a+HsqTrnpCPLLqdtHurbrtWPPak50w/S4VMmtvS9HREtfcOi1Gq16Onpaeo1A78o/ar0hal6f43Q9/NS6Hvmucv13ICYmmBp/Req3/d519+nK+5+dPfy6XNn6PyFr2n6fWz3RkRt6Hhld/sM90UZabzTVL2/Rug733hVXLBi3aDgl6Tnoj5eZQ/1bR8U/JJ0xV2P6qG+7S37jMqGP4DOt2zN5qbGq2L1Y082NT4WhD+APdbC2VObGq+KOdMPamp8LCob/o32hVZlH2nV+2uEvvONV8U5Jx2pCR48NsGq/KTv4VMm6vS5MwaNnT53RksnfSs94StV/+iIqvfXCH2n1TdH+4z9aJ9GE76VD38ASFlyR/sAABoj/AEgQYQ/ACSI8AeABBH+AJAgwh8AEkT4A0CCCH8ASBDhDwAJIvwBIEFcyavDVb2/RlLte+UDm3XLA306cdYUHT+r2me2HKjIK1qlqtLn9qn6lY+q3l8jqfZ94gW36cG+p3cvHzFlf60459jS6mmXVl3RKlXJndun6lc+qnp/jaTa98oHNg8Kfkn6ad/TWvlAtS9q0o4rWqWqsuEPVMktD/Q1NV4V7biiVapKC3/bJ9v+qe2HbC8uqw6gE5w4a0pT41XRjitapaqU8Lc9TtLXJL1d0ixJp9me1crPqPqVj6reXyOp9n38rKk6Ysr+g8aOmLJ/5Sd923FFq1SVMuFre66kJRFxUrZ8riRFxBcavYYreQ2v6v01kmrfHO3D0T7N2qOu5GX7TySdHBF/kS2/X9IbIuKsIc9bJGmRJM2YMeP1GzdubHutANDJ9rSjfTzM2Av+FYqIiyOiFhG1rq6uNpQFAGkoK/x/Lmn6gOWXS/pFSbUAQHLKCv8fS5pp+1Dbe0t6r6QbSqoFAJJTyukdImKH7bMkrZA0TtJlEXF/GbUAQIpKO7dPRNwk6aayPh8AUtYx5/axvVVSJx7uM1nStrKLKAF9pyXVvqU9v/dXRMQLjpjpmPDvVLZ7hjvMquroOy2p9i11bu+c2wcAEkT4A0CCCP/iXVx2ASWh77Sk2rfUob2zzx8AEsSWPwAkiPAHgAQR/i1ke7rtVbbX2b7f9kez8Um2b7W9Prs/uOxaW2mEvpfYftz26uz2jrJrbSXb+9j+ke17s77/Lhuv+vpu1Hel13c/2+Ns/8T2jdlyR65v9vm3kO1pkqZFxD22J0rqlXSKpA9IeiIilmZXLTs4Iv66vEpba4S+/1TSryPiH8qsryi2LWn/iPi17QmSbpf0UUnvUbXXd6O+T1aF13c/2x+XVJP00ohYYPtL6sD1zZZ/C0XEpoi4J3u8XdI6SS+TtFDS5dnTLlc9GCtjhL4rLep+nS1OyG6h6q/vRn1Xnu2XS5ov6RsDhjtyfRP+BbHdLel1kn4oaUpEbJLqQSnpkBJLK9SQviXpLNtrbF/WKf8dbka2C2C1pC2Sbo2IJNZ3g76liq9vSV+V9ElJuwaMdeT6JvwLYPsASddI+lhE/KrsetplmL4vlPRKSXMkbZL05fKqK0ZE7IyIOapfk+IY20eVXFJbNOi70uvb9gJJWyKit+xaWoHwb7FsH+g1kq6KiGuz4b5sv3j//vEtZdVXlOH6joi+LCR2SbpE0jFl1likiHhS0m2q7/eu/PruN7DvBNb3PEnvsr1B0r9LeqvtK9Wh65vwb6FsIuxSSesi4isD/nSDpDOyx2dIWtbu2orUqO/+L0Tm3ZLWtru2Itnusn1Q9nhfSSdI+l9Vf30P23fV13dEnBsRL4+IbtUvQPW9iHifOnR9c7RPC9l+s6T/kXSfnt8n+CnV939fLWmGpEclnRoRT5RSZAFG6Ps01XcBhKQNks7s3zdaBbZnqz7BN071DamrI+J827+naq/vRn3/myq8vgeyfaykT2RH+3Tk+ib8ASBB7PYBgAQR/gCQIMIfABJE+ANAggh/AEgQ4Q8ACSL8ASBBhD+Qg+3rbfdm569flI190PaDtm+zfYntf87Gu2xfY/vH2W1eudUDL8SPvIAcbE+KiCey0xn8WNJJku6QdLSk7ZK+J+neiDjL9rckfT0ibrc9Q9KKiDiytOKBYYwvuwCgQ5xt+93Z4+mS3i/p+/0/47f9n5Jelf39BEmz6qc8kiS91PbE7FoHwB6B8AdGkZ3H5QRJcyPiGdu3SfqppEZb83tlz322LQUCY8A+f2B0B0r6ZRb8fyDpjZL2k/QW2wfbHi/pjwc8/xZJZ/Uv2J7TzmKBPAh/YHQ3Sxpve42kz0q6W9Ljkj6v+hlb/1vSA5Keyp5/tqRadkWrByR9uP0lAyNjwhcYI9sHZBcxHy/pOkmXRcR1ZdcF5MGWPzB2S7Lr2K6V9Iik60utBmgCW/4AkCC2/AEgQYQ/ACSI8AeABBH+AJAgwh8AEvT/jpnIAFnhjmYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df.plot.scatter(x='age', y='yrs_married')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "866c6905-0737-4d9b-b91d-48fd40481ec9",
   "metadata": {},
   "source": [
    "# 任务4：ChatGPT文本分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "34fa566b-444e-4c88-86ae-46da717698b0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>review</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>味道没有传说中好</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>太糟了。等了两个小时,牛肉我吃的快吐了,再也不可能第二次</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>微辣一点都不辣啊</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>定很多次啦！菜很好吃！也很及时！</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>太慢了、恶死了</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   label                        review\n",
       "0      0                      味道没有传说中好\n",
       "1      0  太糟了。等了两个小时,牛肉我吃的快吐了,再也不可能第二次\n",
       "2      1                      微辣一点都不辣啊\n",
       "3      1              定很多次啦！菜很好吃！也很及时！\n",
       "4      0                       太慢了、恶死了"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.DataFrame([[0,'辣，饭冷啦，都两三小时才送来慢'],\n",
    "[0,'足足等了將近一個半小時，飯也基本不太熱了，因為網上付款了所以吃飯就不退送餐費，所以你們就任意遲到，我說的有錯嗎'],\n",
    "[0,'送餐的师傅，服务质量太差，送过来都已经坨了'],\n",
    "[1,'送的慢了点'],\n",
    "[1,'还行，就是速度好慢，一个多小时'],\n",
    "[0,'好慢，差一点就超时能打五折了'],\n",
    "[0,'皮太厚，不喜欢'],\n",
    "[0,'卷饼味道真的很一般'],\n",
    "[0,'就是太慢了，我12：44才收到，晚了一个小时，饭都凉了。'],\n",
    "[0,'土豆要多煮一下']], columns=['label', 'review'])\n",
    "\n",
    "test = pd.DataFrame([[0,'味道没有传说中好'],\n",
    "[0,'太糟了。等了两个小时,牛肉我吃的快吐了,再也不可能第二次'],\n",
    "[1,'微辣一点都不辣啊'],\n",
    "[1,'定很多次啦！菜很好吃！也很及时！'],\n",
    "[0,'太慢了、恶死了']], columns=['label', 'review'])\n",
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "c9b68430-fa95-4a94-95f1-0f378ee87908",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'chatcmpl-7qMCoje23tF4qt8qVDyuRvkOE5X1J',\n",
       " 'object': 'chat.completion',\n",
       " 'created': 1692712770,\n",
       " 'model': 'gpt-3.5-turbo-0613',\n",
       " 'choices': [{'index': 0,\n",
       "   'message': {'role': 'assistant', 'content': '0\\n0\\n0\\n1'},\n",
       "   'finish_reason': 'stop'}],\n",
       " 'usage': {'prompt_tokens': 158,\n",
       "  'completion_tokens': 7,\n",
       "  'total_tokens': 165,\n",
       "  'pre_token_count': 4096,\n",
       "  'pre_total': 43,\n",
       "  'adjust_total': 41,\n",
       "  'final_total': 2}}"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"给定下面几句话:\\n{}\\n其中'0'代表负面句子，'1'代表正面句子。请使用'0'或'1'来区分句子的分类,只需回复0和1\"\n",
    "response = chat(prompt.format('\\n'.join(test['review'].tolist())))\n",
    "response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "7d342b1a-0e9c-400a-bbec-138f1a4af9a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'chatcmpl-7qMCqb3TcOUteEsL5EzfGw29bTClM',\n",
       " 'object': 'chat.completion',\n",
       " 'created': 1692712772,\n",
       " 'model': 'gpt-3.5-turbo-0613',\n",
       " 'choices': [{'index': 0,\n",
       "   'message': {'role': 'assistant',\n",
       "    'content': '味道没有传说中好 - 0\\n太糟了。等了两个小时,牛肉我吃的快吐了,再也不可能第二次 - 0\\n微辣一点都不辣啊 - 0\\n定很多次啦！菜很好吃！也很及时！ - 1\\n太慢了、恶死了 - 0'},\n",
       "   'finish_reason': 'stop'}],\n",
       " 'usage': {'prompt_tokens': 524,\n",
       "  'completion_tokens': 106,\n",
       "  'total_tokens': 630,\n",
       "  'pre_token_count': 4096,\n",
       "  'pre_total': 47,\n",
       "  'adjust_total': 42,\n",
       "  'final_total': 5}}"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"给定下面几句话对应分类如下:\\n{}\\n其中'0'代表负面句子，'1'代表正面句子。请使用'0'或'1'来区分下面句子的分类:\\n{}\"\n",
    "train_sample = []\n",
    "for idx, row in train.iterrows():\n",
    "    train_sample.append('{}.{},它的分类是{}\\n'.format(idx, row['review'], row['label']))\n",
    "response = chat(prompt.format(train_sample, '\\n'.join(test['review'].tolist())))\n",
    "response.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc408475-376f-40c0-924b-2d8a3966eeb1",
   "metadata": {},
   "source": [
    "# 任务5：ChatGPT实体抽取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f2310fd7-3362-458f-b130-9cf93becfbe0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:54<00:00, 13.67s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'食物': '根据给定的文本，其中的食物有山药、豌豆、胡萝卜、葱、姜和小米椒。',\n",
       " '时间': '给定的文本中没有明确提及时间。',\n",
       " '动作': '找到的动作有：\\n1. 山药去皮洗净\\n2. 切成小丁\\n3. 用水浸泡\\n4. 豌豆剥好\\n5. 冲洗干净\\n6. 胡萝卜去皮洗净\\n7. 切成小丁\\n8. 准备好葱姜末\\n9. 准备好小米椒碎'}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_list = ['山药去皮洗净，切成小丁，用水浸泡，避免氧化变色。豌豆剥好，冲洗干净。胡萝卜去皮洗净，切成小丁。葱姜末、小米椒碎准备好。',\n",
    "'锅内加水，煮开后放入少许盐，倒入豌豆，煮几分钟，至豌豆断生，煮好后捞出沥干水分。',\n",
    "'另起锅加油，烧热后倒入姜末、小米椒碎爆锅，再倒入胡萝卜、山药翻炒，炒两分钟。',\n",
    "'最后倒入豌豆翻炒均匀，再加入盐、鸡精、葱末翻炒几下，就可以出锅了，很好吃下饭香，快尝尝吧。']\n",
    "\n",
    "target_list = ['食物', '时间', '动作']\n",
    "prompt = \"给定一个文本如下:\\n{}\\n请帮我找到其中的{}。\"\n",
    "result_list = []\n",
    "for i in tqdm(range(len(data_list))):\n",
    "    data = data_list[i]\n",
    "    result_list.append({})\n",
    "    for target in target_list:\n",
    "        response = chat(prompt.format(data, target))\n",
    "        result_list[i][target] = response.json()['choices'][0]['message']['content']\n",
    "result_list[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "426f5f83-abf9-4914-a3d6-e9902493a549",
   "metadata": {},
   "source": [
    "# 任务6：ChatGPT关系抽取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f9e9da7e-9c4a-4399-ab7d-6069432ad75a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:27<00:00,  6.97s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'王小蒙 - 拥有 - 豆腐厂\\n谢永强 - 专注 - 自己的果园\\n王小蒙 - 见不上 - 谢永强\\n王小蒙 - 推迟 - 婚期\\n谢永强 - 推迟 - 婚期\\n谢广坤 - 是 - 永强的父亲'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_list = ['王小蒙的豆腐厂上线，每天忙里忙外，不得清闲。谢永强则专注自己的果园，和小蒙几天都见不上一面。两个孩子忙事业始终推迟婚期，急坏永强的父亲谢广坤。',\n",
    "'赵玉田和刘英结婚后，开始忙于花圃的经营。来村中实习的女大学生陈艳南吸引了赵玉田的注意，刘英大吃其醋，无事忙刘能对女婿的做法也颇为不满。',\n",
    "'香秀迎来了新同事王天来，李大国担心嫉妒，从中作梗。王天来单恋陈艳南，又和赵玉田矛盾不断。',\n",
    "'长贵一方面想吸引王大拿来村里投资，一方面又怕谢大脚被大拿抢走，他和大脚之间若即若离，忽冷忽热，闹出不少笑话。']\n",
    "\n",
    "prompt = \"给定一个文本如下:\\n{}\\n请帮我识别文本中的任命，并列出他们的关系。最后输出如下格式：实体 - 关系 - 实体\"\n",
    "result_list = []\n",
    "for i in tqdm(range(len(data_list))):\n",
    "    result_list.append(chat(prompt.format(data_list[i])).json()['choices'][0]['message']['content'])\n",
    "result_list[0]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4fbadc10-f911-4637-8c46-91809eceb3fc",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:11<00:00,  2.97s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'王小蒙 - 夫妻 - 谢永强'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"给定一个文本如下:\\n{}\\n请帮我识别文本中的夫妻关系。最后输出如下格式：实体 - 关系 - 实体\"\n",
    "result_list = []\n",
    "for i in tqdm(range(len(data_list))):\n",
    "    result_list.append(chat(prompt.format(data_list[i])).json()['choices'][0]['message']['content'])\n",
    "result_list[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "8832d4bc-1a42-438e-a0a4-a129909687ee",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:16<00:00,  4.24s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'文本中没有提到任何与“香秀”相关的人物，因此输出无。'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"给定一个文本如下:\\n{}\\n请帮我识别文本中与“香秀”相关的人物。若存在关系，最后输出如下格式：实体 - 关系 - 实体；若不存在关系则直接输出无。\"\n",
    "result_list = []\n",
    "for i in tqdm(range(len(data_list))):\n",
    "    result_list.append(chat(prompt.format(data_list[i])).json()['choices'][0]['message']['content'])\n",
    "result_list[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8269637-5a4a-47ce-b455-a7b73f5cb9e7",
   "metadata": {},
   "source": [
    "# 任务7：ChatGPT渲染表格"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "f498e370-2280-44bb-a209-500102cb4fed",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "以下是根据给定数据绘制的表格：\n",
      "\n",
      "| class | precision | recall | f1-score | support |\n",
      "|-------|-----------|--------|----------|---------|\n",
      "|   0   |   0.50    |  1.00  |   0.67   |    1    |\n",
      "|   1   |   0.00    |  0.00  |   0.00   |    2    |\n",
      "|   2   |   1.00    |  0.67  |   0.80   |    3    |\n"
     ]
    }
   ],
   "source": [
    "table = 'precision recall f1-score support\\nclass 0 0.50 1.00 0.67 1\\nclass 1 0.00 0.00 0.00 2\\nclass 2 1.00 0.67 0.80 3'\n",
    "prompt = '跟定一个表格数据如下：{}\\n请帮我绘制成表格。'\n",
    "print(chat(prompt.format(table)).json()['choices'][0]['message']['content'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb3085a6-1cc5-4b83-9286-aea1d8ad35fb",
   "metadata": {},
   "source": [
    "# 任务8：ChatGPT文本匹配"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "fd2bd927-ced5-4ccc-b390-a1a781ea2efd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>魔兽世界坐骑去哪买</td>\n",
       "      <td>奥比岛在哪有皇室舞会的邀请函？</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>在淘宝去哪里充值好</td>\n",
       "      <td>在淘宝里怎么买火车票，哪里有</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>鱼不可以和什么蔬菜一起吃</td>\n",
       "      <td>鱼不能和什么药物一起吃</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>读音是什么？怎么组词</td>\n",
       "      <td>熙的读音和组词是什么？</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>七夕到了，单身该怎么过啊？</td>\n",
       "      <td>七夕单身的人怎么过？</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>什么品牌的智能手机好用</td>\n",
       "      <td>现在什么牌子的智能手机好用，又便宜呀</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>一个人怎么过七夕？</td>\n",
       "      <td>七夕一个人怎么过</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               0                   1  2\n",
       "0      魔兽世界坐骑去哪买     奥比岛在哪有皇室舞会的邀请函？  0\n",
       "1      在淘宝去哪里充值好      在淘宝里怎么买火车票，哪里有  0\n",
       "2   鱼不可以和什么蔬菜一起吃         鱼不能和什么药物一起吃  0\n",
       "3     读音是什么？怎么组词         熙的读音和组词是什么？  1\n",
       "4  七夕到了，单身该怎么过啊？          七夕单身的人怎么过？  1\n",
       "5    什么品牌的智能手机好用  现在什么牌子的智能手机好用，又便宜呀  1\n",
       "6      一个人怎么过七夕？            七夕一个人怎么过  1"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.DataFrame([['魔兽世界坐骑去哪买','奥比岛在哪有皇室舞会的邀请函？',0],\n",
    "['在淘宝去哪里充值好','在淘宝里怎么买火车票，哪里有',0],\n",
    "['鱼不可以和什么蔬菜一起吃','鱼不能和什么药物一起吃',0],\n",
    "['读音是什么？怎么组词','熙的读音和组词是什么？',1],\n",
    "['七夕到了，单身该怎么过啊？','七夕单身的人怎么过？',1],\n",
    "['什么品牌的智能手机好用','现在什么牌子的智能手机好用，又便宜呀',1],\n",
    "['一个人怎么过七夕？','七夕一个人怎么过',1]])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "899a1ab5-d980-4a67-a16d-fde7ca4f97ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>label</th>\n",
       "      <th>sim</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.480602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.744537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.872392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0.903893</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0.897476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0.840700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0.978194</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  label       sim\n",
       "0   0      0  0.480602\n",
       "1   1      0  0.744537\n",
       "2   2      0  0.872392\n",
       "3   3      1  0.903893\n",
       "4   4      1  0.897476\n",
       "5   5      1  0.840700\n",
       "6   6      1  0.978194"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用bert求相关性\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from transformers import AutoTokenizer, AutoModel\n",
    "import torch\n",
    "\n",
    "# 获取embedding\n",
    "def get_bert_emb(text, tokenizer, model):\n",
    "    inputs = torch.tensor([tokenizer.encode_plus(text)['input_ids']])\n",
    "    emb = model.embeddings(input_ids=inputs)\n",
    "    average_value = torch.mean(emb, dim=1)\n",
    "    return average_value[0].detach().numpy()\n",
    "\n",
    "\n",
    "# 加载模型\n",
    "tokenizer = AutoTokenizer.from_pretrained('D:/bert/hfl/chinese-roberta-wwm-ext')\n",
    "model = AutoModel.from_pretrained('D:/bert/hfl/chinese-roberta-wwm-ext')\n",
    "# 计算相似度\n",
    "embed_data = []\n",
    "for idx, row in data.iterrows():\n",
    "    emb_0 = get_emb(row[0], tokenizer, model)\n",
    "    emb_1 = get_emb(row[1], tokenizer, model)\n",
    "    embed_data.append({\n",
    "        'id': idx, \n",
    "        'label': row[2],\n",
    "        'sim': cosine_similarity([emb_0, emb_1])[0][1]\n",
    "    })\n",
    "pd.DataFrame(embed_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "8260fb53-aba1-4cdd-8dff-1f69f1eff21c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>label</th>\n",
       "      <th>sim</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.714390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.841420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.946097</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0.894108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0.970961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0.947144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0.945166</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  label       sim\n",
       "0   0      0  0.714390\n",
       "1   1      0  0.841420\n",
       "2   2      0  0.946097\n",
       "3   3      1  0.894108\n",
       "4   4      1  0.970961\n",
       "5   5      1  0.947144\n",
       "6   6      1  0.945166"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_gpt_emb(text):\n",
    "    url = \"https://openai.api2d.net/v1/embeddings\"\n",
    "\n",
    "    headers = {\n",
    "      'Content-Type': 'application/json',\n",
    "      'Authorization': 'Bearer fk212271-eTpF7inqXN1UJ6FPQgRDCRkpv2K9zsmn' # <-- 把 fkxxxxx 替换成你自己的 Forward Key，注意前面的 Bearer 要保留，并且和 Key 中间有一个空格。\n",
    "    }\n",
    "\n",
    "    data = {\n",
    "        \"model\": \"text-embedding-ada-002\",\n",
    "        \"input\": text\n",
    "    }\n",
    "    response = requests.post(url, headers=headers, json=data)\n",
    "    return response.json()['data'][0]['embedding']\n",
    "\n",
    "# 计算相似度\n",
    "embed_data = []\n",
    "for idx, row in data.iterrows():\n",
    "    emb_0 = get_gpt_emb(row[0])\n",
    "    emb_1 = get_gpt_emb(row[1])\n",
    "    embed_data.append({\n",
    "        'id': idx, \n",
    "        'label': row[2],\n",
    "        'sim': cosine_similarity([emb_0, emb_1])[0][1]\n",
    "    })\n",
    "pd.DataFrame(embed_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b127e61f-b38b-4b04-b58b-7c8ec6442078",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
